/*
PROGRAM BY: Teresa Fort
Last edited 04/23/2020

This program makes an industry-hs code dataset 

The data contain a list of all potentia hs10 codes that are used as inputs by plants in this industry



*/ 

%include 'yyyy/pdata.sas';

options obs=max;
    

libname ecroot 'xxxx/data';
%let input=xxxx/input ;

%macro make_hs_prod(y);

%if &y. >= 1992 and &y. < 1997 %then %do;
data cmfprod&y.;
   length sic $ 4 ;
    set ecroot.cmf_prod_match1992;
    sic=substr(curpc,1,4);
    count=1;
    if fk_naics12~=" ";
run;
***************************************;


*A.  ****MAKE INDUSTRY BY HS MAPPING****;

*2. collapse to level at which i can merge;
***************************************;
proc sort data=cmfprod&y.; by fk_naics12 sic; run;

proc means sum noprint data=cmfprod&y.;
   by fk_naics12 sic;
   var pv;
   output out=prod_conc1(keep=sic fk_naics12 pv) sum()= ;
run;

data bridge(drop=naics);
    set  ecroot.prod_code_bridge_all ;
    if year = &y.;
run;
***************************************;

*3. joinby using the concordance i made;
***************************************;
*left join on naics;
/*
proc sql ;
  create table mat_hs1 as 
  select A.*, B.*
  from Mat_conc1 A left join ecroot.Mat_code_bridge B
    on A.naics=B.naics
    order by naics;
 quit;
*/

 proc sql ;
  create table mat_hs2 as 
  select *
  from prod_conc1 left join bridge 
    on prod_conc1.sic=bridge.sic ;
 quit;
***************************************; 


 
*4. collapse to get all HS codes by estab naics;
***************************************;
data mat_hs3; 
   set mat_hs2;
   if match_level=4 then mlevel_4=1;
   if match_level=3 then mlevel_3=1;
run;

proc sort data=mat_hs3; by fk_naics12 hs; run;

proc means sum noprint data=mat_hs3;
    by fk_naics12 hs ;
    var mlevel:; 
    output out=mat_hs4(drop= _type_) sum()= ;
run;    
    
data ecroot.sic_out_hs_prod&y.(drop=_freq_);
    set mat_hs4;
    label fk_naics12="Plant industry based on FK NAICS 12" ;
    if hs~=. ;
    year = &y.;
run;
  *note: mlevel variables are the sum of the records for which this relationship mapped at that level;
***************************************;


*B.  ****MAKE FIRM-HS MAPPING****;

*2. collapse to level at which i can merge;
***************************************;
%if &y.=1992 %then %do;
proc sort data=cmfprod&y.; by firmid sic; run;

proc means sum noprint data=cmfprod&y.;
   by firmid sic;
   var pv;
   output out=mat_firm1(keep=firmid sic pv) sum()= ;
run;
***************************************;

*3. joinby using the concordance i made;
***************************************;
*left join on naics;

 proc sql ;
  create table firm_hs2 as 
  select *
  from mat_firm1 left join bridge 
    on  mat_firm1.sic=bridge.sic ;
 quit;
***************************************; 



*4. collapse to get all HS codes by firmid;
***************************************;
data firm_hs3; 
   set firm_hs2;
   if match_level=4 then mlevel_4=1;
   if match_level=3 then mlevel_3=1;
run;

proc sort data=firm_hs3; by firmid hs; run;

proc means sum noprint data=firm_hs3;
    by firmid hs ;
    var mlevel:; 
    output out=firm_hs4(drop= _type_) sum()= ;
run;    
    
data ecroot.firm_hs_prods&y.(drop=_freq_);
    set firm_hs4;
    label hs="HS codes that map from sic-based material trailers of firm" ;
    if hs~=. ;
    year = &y.;
run;
%end;
%end;

%if &y. >= 1997 %then %do;
*1) bring in the plant-mat code dataset; 
***************************************;
%if &y. >= 1997 & &y. < 2002 %then %do;
data cmfprod&y.;
   length naics $ 6 ;
    set ecroot.cmf_prod_match1997;
    naics=substr(naicspc,1,6);
    count=1;
    if fk_naics12~=" ";
run;
%end;

%if &y. >= 2002 & &y. < 2007 %then %do;
data cmfprod&y.;
   length naics $ 6 ;
    set ecroot.cmf_prod_match2002;
    naics=substr(naicspc,1,6);
    count=1;
    if fk_naics12~=" ";
run;
%end;

%if &y. >= 2007 & &y. < 2012 %then %do;
data cmfprod&y.;
   length naics $ 6 ;
    set ecroot.cmf_prod_match2007;
    naics=substr(naicspc,1,6);
    count=1;
    if fk_naics12~=" ";
run;
%end;

%if &y. >= 2012 & &y. < 2017 %then %do;
data cmfprod&y.;
   length naics $ 6 ;
    set ecroot.cmf_prod_match2012;
    naics=substr(naicspc,1,6);
    count=1;
    if fk_naics12~=" ";
run;
%end;
***************************************;


*A.  ****MAKE INDUSTRY BY HS MAPPING****;

*2. collapse to level at which i can merge;
***************************************;
proc sort data=cmfprod&y.; by fk_naics12 naics; run;

proc means sum noprint data=cmfprod&y.;
   by fk_naics12 naics;
   var pv;
   output out=prod_conc1(keep=naics fk_naics12 pv) sum()= ;
run;

data bridge(drop=sic);
    set  ecroot.prod_code_bridge_all ;
    if year = &y.;
run;
***************************************;

*3. joinby using the concordance i made;
***************************************;
*left join on naics;
/*
proc sql ;
  create table mat_hs1 as 
  select A.*, B.*
  from Mat_conc1 A left join ecroot.Mat_code_bridge B
    on A.naics=B.naics
    order by naics;
 quit;
*/

 proc sql ;
  create table mat_hs2 as 
  select *
  from prod_conc1 left join bridge 
    on prod_conc1.naics=bridge.naics ;
 quit;
***************************************; 


 
*4. collapse to get all HS codes by estab naics;
***************************************;
data mat_hs3; 
   set mat_hs2;
   if match_level=6 then mlevel_6=1;
   if match_level=5 then mlevel_5=1;
   if match_level=4 then mlevel_4=1;
   if match_level=3 then mlevel_3=1;
run;

proc sort data=mat_hs3; by fk_naics12 hs; run;

proc means sum noprint data=mat_hs3;
    by fk_naics12 hs ;
    var mlevel:; 
    output out=mat_hs4(drop= _type_) sum()= ;
run;    
    
data ecroot.naics_out_hs_prod&y.(drop=_freq_);
   set mat_hs4;
   label fk_naics12="Plant industry based on FK NAICS 12" ;
   if hs~=. ;
run;
  *note: mlevel variables are the sum of the records for which this relationship mapped at that level;
***************************************;


*B.  ****MAKE FIRM-HS MAPPING****;

*2. collapse to level at which i can merge;
***************************************;
%if &y.=1997 or &y.=2002 or &y.=2007 or &y.=2012 %then %do;
proc sort data=cmfprod&y.; by firmid naics; run;

proc means sum noprint data=cmfprod&y.;
   by firmid naics;
   var pv;
   output out=mat_firm1(keep=firmid naics pv) sum()= ;
run;
***************************************;

*3. joinby using the concordance i made;
***************************************;
*left join on naics;

 proc sql ;
  create table firm_hs2 as 
  select *
  from mat_firm1 left join bridge 
    on  mat_firm1.naics=bridge.naics ;
 quit;
***************************************; 



*4. collapse to get all HS codes by firmid;
***************************************;
data firm_hs3; 
   set firm_hs2;
   if match_level=6 then mlevel_6=1;
   if match_level=5 then mlevel_5=1;
   if match_level=4 then mlevel_4=1;
   if match_level=3 then mlevel_3=1;
run;

proc sort data=firm_hs3; by firmid hs; run;

proc means sum noprint data=firm_hs3;
    by firmid hs ;
    var mlevel:; 
    output out=firm_hs4(drop= _type_) sum()= ;
run;    
    
data ecroot.firm_hs_prods&y.(drop=_freq_);
    set firm_hs4;
    label hs="HS codes that map from naics material trailers of firm" ;
    if hs~=. ;
    year = &y.;
run;
%end;
%end;
%mend;
  *note: mlevel variables are the sum of the records for which this relationship mapped at that level;
***************************************; 
*do this for all years;
%macro loop1();
    %do y=1992 %to 2016 %by 1;
        %make_hs_prod(&y.);
    %end;
%mend;
%loop1();
